Os dados¶
Lendo os dados de todas as cidades disponíveis:
[1]:
import os
import pandas as pd
path = "./PG_IMT/DadosEpidemia/UKCities/"
files = os.listdir(path)
dataframes = []
for file in files:
data = pd.read_csv(path + file).to_dict()
keys = [k for k in data.keys()]
size = len(data[keys[0]])
# Create a column for the city name
city = [file[:-4] for n in range(size)]
data["city"] = dict(zip(range(size), city))
dataframes.append(pd.DataFrame(data))
data = pd.concat(dataframes, ignore_index=True)
data = data.dropna()
data.head()
[1]:
time | cases | births | pop | city | |
---|---|---|---|---|---|
0 | 1944.016427 | 0 | 27.269231 | 28350.000000 | Bedwellty |
1 | 1944.054757 | 0 | 27.148291 | 28339.031079 | Bedwellty |
2 | 1944.093087 | 0 | 27.027352 | 28328.062157 | Bedwellty |
3 | 1944.131417 | 0 | 26.906413 | 28317.093236 | Bedwellty |
4 | 1944.169747 | 1 | 26.785473 | 28306.124314 | Bedwellty |
Determinando os parâmetros - SIR¶
Nesta seção, iremos utilizar a biblioteca models
, que concentra todas as funções dos modelos SIR desenvolvidos nas seções anteriores.
Para uma única cidade¶
[2]:
from models import *
# Getting the data for a particular city
dataset = data.where(data["city"] == "London").dropna()
# Creating the data for trainning
N = int(dataset["pop"].mean())
B = dataset["births"].to_numpy()
I = dataset["cases"].to_numpy()
S = dataset["pop"].to_numpy() - I + B
t = dataset["time"].to_numpy()
# Creating the SIR model
model = ss.SIR(pop=N, focus=["I"])
# Fitting the model on data
fit_data = model.fit_multiple(S, I, B, t,
r_sens=[1000,10], beta_sens=[100,100])
# Plotting the model results
fig = model.result_summary(
out_plot=True,
plot_size=[600,400])
from bokeh.plotting import show
show(fig)
Windows starting at: [17, 46, 98, 124, 174, 222, 276, 331, 383, 429, 481]
Windows ending at: [47, 72, 124, 153, 206, 260, 311, 361, 415, 468, 519]
Window start cases: [41.0, 88.0, 113.0, 157.0, 308.0, 1521.0, 249.0, 183.0, 194.0, 256.0, 141.0]
New iter::: 1
├─ S(0) ─ 2550525.415201801 I(0) ─ 41.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.7605155319336778 ─ 0.5771663611353828
New iter::: 2
├─ S(0) ─ 2949891.618829986 I(0) ─ 88.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.652527725167639 ─ 0.5784638670213627
New iter::: 3
├─ S(0) ─ 3302261.375966816 I(0) ─ 113.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.5930110943411019 ─ 0.5853040022414066
New iter::: 4
├─ S(0) ─ 3360609.294403033 I(0) ─ 157.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.5510788084784277 ─ 0.5541593857111112
New iter::: 5
├─ S(0) ─ 3379912.492617072 I(0) ─ 308.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.4418921319691951 ─ 0.437900484416566
New iter::: 6
├─ S(0) ─ 3361115.5108282953 I(0) ─ 1521.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.2746852290519879 ─ 0.27916031863574764
New iter::: 7
├─ S(0) ─ 3321231.492546763 I(0) ─ 249.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.36599758110134534 ─ 0.35877292971764685
New iter::: 8
├─ S(0) ─ 3272868.3492476433 I(0) ─ 183.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.4917754139555685 ─ 0.4781307010467401
New iter::: 9
├─ S(0) ─ 3226812.132892697 I(0) ─ 194.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.5780416048451077 ─ 0.5570092885781249
New iter::: 10
├─ S(0) ─ 3199397.624806639 I(0) ─ 256.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.3327289237627916 ─ 0.315264257523117
New iter::: 11
├─ S(0) ─ 3185602.2749261744 I(0) ─ 141.0
├─ beta ─ 1 r ─ 0.1
├─ beta bound ─ 0.01 ─ 10
├─ r bound ─ 0.001 ─ 1.0
└─ Defined at: 0.4244705235353749 ─ 0.4056067055948321
[3]:
[3]:
3212255.4403247973
Estimando para todas cidades¶
[ ]:
from models import *
# Define the city names
cities = data["city"].unique()
par_data = dict()
for i, city in enumerate(cities):
# Getting the data for a particular city
dataset = data.where(data["city"] == city).dropna()
# Creating the data for trainning
N = dataset["pop"].mean()
B = dataset["births"].to_numpy()
I = dataset["cases"].to_numpy()
S = dataset["pop"].to_numpy() - I + B
t = dataset["time"].to_numpy()
# Creating the SIR model
model = ss.SIR(verbose=False)
# Fitting the model on data
par_data[city] = model.fit_multiple(S,I,B,t, out_type=1)
# Save the summary on folder
model.result_summary(
save_results=True,
plot_size=[700,500],
folder_path="./estimation_summaries/",
file_name=city+"_summary.png")
print("Finished - ", city, " - ", i+1, " of ", len(cities))
Obtendo informações de Lat e Long¶
[ ]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="UK_EpidemicModels_App")
for city in cities:
city_name = city.replace(".", " ")
location = geolocator.geocode(city_name)
par_data[city]["lat"] = location[-1][0]
par_data[city]["lon"] = location[-1][1]
print("- Located: ", location[-1], " ", city_name)
Correlação dos modelos¶
Interpolando os modelos estimados¶
[ ]:
import numpy as np
import scipy.signal as scs
from scipy.interpolate import Rbf, InterpolatedUnivariateSpline
first_year = round(min([par_data[city]["time"][0] for city in cities]))
last_year = round(max([par_data[city]["time"][-1] for city in cities]))
points = int(last_year - first_year) - 1
data_struc = {
"lat": [],
"lon": [],
"city": [],
"year": [],
"beta": [],
"r": [],
}
points = 30
time = np.linspace(first_year, last_year, points)
for city in cities:
# Selecionando os parâmetros estimados
beta_res = par_data[city]["pars"]["beta"]
r_res = par_data[city]["pars"]["r"]
year = par_data[city]["time"]
# Interpolando com reamostragem
#beta_res, t_res = scs.resample(beta, points, t=year-year[0])
#r_res, t_res = scs.resample(r, points, t=year-year[0])
#t_res = t_res.astype(int)
# Interpolando com Univariate Splines
#beta_rbf = InterpolatedUnivariateSpline(year, beta_res)
#r_rbf = InterpolatedUnivariateSpline(year, r_res)
# Interpolando com Radial Basis Functions
beta_rbf = Rbf(year, beta_res, function='gaussian', smooth=2)
r_rbf = Rbf(year, r_res, function='gaussian', smooth=2)
# Salvando do dicionário cada campo
data_struc["lat"] += [par_data[city]["lat"] for k in range(points)]
data_struc["lon"] += [par_data[city]["lon"] for k in range(points)]
data_struc["city"] += [city for k in range(points)]
data_struc["beta"] += beta_rbf(time).tolist()
data_struc["r"] += r_rbf(time).tolist()
data_struc["year"] += time.tolist()
# Criando um data frame
df = pd.DataFrame(data_struc)
Visualizando a interpolação¶
[ ]:
# Creating the parameters plot
#
from bokeh.models import Legend, ColumnDataSource, RangeTool, LinearAxis, Range1d
from bokeh.palettes import brewer, Inferno256
from bokeh.plotting import figure, show
from bokeh.layouts import column
from bokeh.io import output_notebook
p_beta = figure(
title="Beta Parameter",
y_axis_type="log",
plot_width=700,
plot_height=500
)
p_r = figure(
title="R Parameter",
y_axis_type="log",
plot_width=700,
plot_height=500
)
legend_it, legend_it_r = [], []
for i, city in enumerate(cities):
color = Inferno256[int((i/len(cities))*256)]
df_filt = df.where(df["city"]==city).dropna()
c = p_beta.line(
df_filt["year"].to_list(),
abs(df_filt["beta"]),
line_width=4,
line_cap="round",
color=color
)
cr = p_r.line(
df_filt["year"],
abs(df_filt["r"]),
line_width=4,
line_cap="round",
color=color
)
legend_it.append((city, [c]))
legend_it_r.append((city, [cr]))
p_beta.grid.grid_line_alpha = 0
p_beta.ygrid.band_fill_color = "olive"
p_beta.ygrid.band_fill_alpha = 0.1
p_beta.xaxis.axis_label = "Ano"
legend = Legend(items=legend_it, location=(0, -10))
legend.click_policy="mute"
p_beta.add_layout(legend, "right")
p_beta.legend.click_policy="hide"
p_r.grid.grid_line_alpha = 0
p_r.ygrid.band_fill_color = "olive"
p_r.ygrid.band_fill_alpha = 0.1
p_r.xaxis.axis_label = "Ano"
legend = Legend(items=legend_it_r, location=(0, -10))
legend.click_policy="mute"
p_r.add_layout(legend, "right")
p_r.legend.click_policy="hide"
show(column(p_beta, p_r))
Criando os data frames para matrix de correlação¶
[ ]:
r_data = {}
beta_data = {}
for city in cities:
df_filt = df.where(df["city"]==city).dropna()
data_dict = dict()
zipped_data = zip(
df_filt["year"].to_list(),
df_filt["beta"].to_list())
for y, b in zipped_data:
data_dict[y] = b
beta_data[city] = data_dict
data_dict = dict()
zipped_data = zip(
df_filt["year"].to_list(),
df_filt["r"].to_list())
for y, r in zipped_data:
data_dict[y] = r
r_data[city] = data_dict
r_df = pd.DataFrame(r_data)
r_df = r_df.sort_index()
r_df.index = (1000 * r_df.index.to_numpy()).astype(int)
beta_df = pd.DataFrame(beta_data)
beta_df = beta_df.sort_index()
beta_df.index = (1000 * beta_df.index.to_numpy()).astype(int)
beta_df.head()
Correlação do parâmetro \(\beta\)¶
[ ]:
import seaborn as sns
import matplotlib.pyplot as plt
# Correlation Matrix Heatmap
f, ax = plt.subplots(figsize=(18, 15))
beta_corr = beta_df.corr()
hm = sns.heatmap(round(beta_corr,2),
annot=True,
ax=ax,
cmap="coolwarm",
fmt='.2f',
linewidths=.05)
f.subplots_adjust(top=0.93)
t = f.suptitle('Beta Parameter - Features Correlation Heatmap', fontsize=14)
Correlação do parâmetro \(r\)¶
[ ]:
# Correlation Matrix Heatmap
f, ax = plt.subplots(figsize=(18, 15))
r_corr = r_df.corr()
hm = sns.heatmap(round(r_corr,2),
annot=True,
ax=ax,
cmap="coolwarm",
fmt='.2f',
linewidths=.05)
f.subplots_adjust(top=0.93)
t = f.suptitle('r Parameter - Features Correlation Heatmap', fontsize=14)
Correlação do modelo¶
[ ]:
f, ax = plt.subplots(figsize=(18, 15))
model_corr = 0.5 * ( r_corr + beta_corr )
hm = sns.heatmap(round(model_corr,2),
annot=True,
ax=ax,
cmap="coolwarm",
fmt='.2f',
linewidths=.05)
f.subplots_adjust(top=0.93)
t = f.suptitle('Model - Features Correlation Heatmap', fontsize=14)